# This is the code for effects of exercise on self-esteem in Lin and Vella 2024
# Date: April 16, 2024
rm(list = ls())
ptm <- proc.time()

library(foreach)
library(doParallel)
numCores <- detectCores()
registerDoParallel(numCores)


set.seed(10000)
##setwd('.....') set working directory that data saved
data=read.csv("Exercise_20200805.csv", header = TRUE)
n=(dim(data))[1]

esteem=data$esteem

exercise=data$exercise

aid=data$aid
friendm=data[,7:16]
intelligence=data$intelligence
white=data$white
female=data$female
income=exp(data$logincome)/1000
age=data$age
gpa=data$gpa



# friendm[is.na(friendm)]=0
# SF=matrix(rep(0,n^2),n,n) friendship data is saved as n*10 matrix in the original format and needed to be transformed to n*n matrix

SF=read.csv(file.choose(), header = TRUE, sep = ",")

# for(i in 1:n){   	cat(i,"\n")
#   for(j in 1:10){
#     for(k in 1:n){
#       if(friendm[i,j]==aid[k]){SF[i,k]=1}
#     }
#   }
# }
# 
# write.csv(SF,"Friend_Exercise_20200805.csv",row.names=FALSE)

NF = rowSums(SF);
invNF =1/NF;#1 over number of friend
invNF[which(!is.finite(invNF))]=0;

D=exercise>2; 
Y=esteem






library(sampleSelection)
intel1=as.numeric(intelligence<3)
intel2=as.numeric(intelligence>2 &intelligence<5)
intel3=as.numeric(intelligence>4)


X=cbind(age,female,gpa,intel2,intel3,white,income);
dx=dim(X)[2]
YO1 <- YO2 <- Y
YO1[D == 1] <- NA
YO2[D == 0] <- NA
XO1 <- XO2 <- X
XO1[D == 1, ] <- NA
XO2[D == 0, ] <- NA


###Bivariate Probit
library(VGAM)
library(mvtnorm)
YY=Y>median(Y)
# YY=Y>=quantile(Y,0.25)
# YY=Y>=quantile(Y,0.75)
# YY=Y>quantile(Y,0.10)
# YY=Y>quantile(Y,0.20)
# YY=Y>quantile(Y,0.30)
# YY=Y>quantile(Y,0.40)
# YY=Y>quantile(Y,0.60)
# YY=Y>quantile(Y,0.70)
# YY=Y>quantile(Y,0.80)
# YY=Y>quantile(Y,0.90)

probits = glm(D~age+female+gpa+intel2+intel3+white+income,family=binomial(link="probit"));
# probito = glm(YY~age+female+gpa+intel2+intel3+white+income+D,family=binomial(link="probit"));
BNES=predict(probits,type="response");
#BNEO=predict(probito,type="response");
BNEO=BNES;
BNESUpdate=rep(0,n);
BNEOUpdate=rep(0,n);
BNES=as.vector(BNES);
BNEO=as.vector(BNEO);
BNESUpdate=as.vector(BNESUpdate);
BNEOUpdate=as.vector(BNEOUpdate);


XX=cbind(1,age,female,gpa,intel2,intel3,white,income)

V=as.matrix(SF)%*%as.vector(BNES)*as.vector(invNF)###Initial Social Interactions term
W=as.matrix(SF)%*%as.vector(BNEO)*as.vector(invNF)
datam=data.frame(D,YY,age,female,gpa,intel2,intel3,white,income,V,W)

treat.eq <- D ~ age+female+gpa+intel2+intel3+white+income+V
out.eq <- YY ~age+female+gpa+intel2+intel3+white+income+D+W
f.list <- list(treat.eq, out.eq)
mr <- c("probit", "probit")

library(GJRM)



err=10;
err.tol=1E-3;
npl.reps = 0;
tot.reps = 100;
quiet=FALSE;

while(err>err.tol&npl.reps<tot.reps){
  npl.reps = npl.reps + 1;
  bipro=gjrm(f.list, data=datam,model="B", margins=mr)
  betasestbi=as.numeric(bipro$coefficients);
  betas=betasestbi[1:(dx+2)];
  betao=betasestbi[(dx+3):(2*dx+5)]
  sest=as.numeric(bipro$theta)
  BNESUpdate=pnorm(cbind(1,X,V)%*%betas);
  part1=(pnorm(cbind(1,X,D,W)%*%betao)-pbinorm(cbind(1,X,D,W)%*%betao,cbind(1,X,V)%*%betas,cov12=sest))/(1-BNESUpdate)
  part2=D*(pbinorm(cbind(1,X,D,W)%*%betao,cbind(1,X,V)%*%betas,cov12=sest)/BNESUpdate-(pnorm(cbind(1,X,D,W)%*%betao)
                                                                                       -pbinorm(cbind(1,X,D,W)%*%betao,cbind(1,X,V)%*%betas,cov12=sest))/(1-BNESUpdate))
  BNEOUpdate=part1+part2;
  err=norm(as.matrix(cbind(BNESUpdate,BNEOUpdate))-as.matrix(cbind(BNES,BNEO)),"f")/sqrt(n);
  BNES=BNESUpdate;
  BNEO=BNEOUpdate;
  datam$V=as.matrix(SF)%*%as.vector(BNES)*as.vector(invNF)
  datam$W=as.matrix(SF)%*%as.vector(BNEO)*as.vector(invNF)
  cat(npl.reps, "iteration, distance:",err,"\n")
  if(npl.reps==tot.reps & err>err.tol) {
    cat("The NPL algorithm did not converge. Try increase reps or reduce tolerance");
  }
  
}
summary(bipro)


probito = glm(YY~age+female+gpa+intel2+intel3+white+income+D,family=binomial(link="probit"));
summary(probito)
coefo=coef(probito)

AME=mean(pnorm(coefo[1]+coefo[2]*age+coefo[3]*female+coefo[4]*gpa+coefo[5]*intel2+coefo[6]*intel3+
                 coefo[7]*white+coefo[8]*income+coefo[9])-pnorm(coefo[1]+coefo[2]*age+coefo[3]*female
                                                                +coefo[4]*gpa+coefo[5]*intel2+coefo[6]*intel3+coefo[7]*white+coefo[8]*income))
AME


treatno.eq <- D ~ age+female+gpa+intel2+intel3+white+income
outno.eq <- YY ~age+female+gpa+intel2+intel3+white+D
fno.list <- list(treatno.eq, outno.eq)
mr <- c("probit", "probit")

biprono=gjrm(fno.list, data=datam,model="B", margins=mr)

summary(biprono)

b=as.numeric(biprono$coefficients)
bs=b[1:(dx+1)];
bo=b[(dx+2):(2*dx+2)]
sg=as.numeric(biprono$theta)


EMEno=pbinorm(cbind(1,age,female,gpa,intel2,intel3,white,1)%*%bo,cbind(1,age,female,gpa,intel2,intel3,white,income)%*%bs,cov12=sg)/pnorm(cbind(1,age,female,gpa,intel2,intel3,white,income)%*%bs)
-(pnorm(cbind(1,age,female,gpa,intel2,intel3,white,0)%*%bo)
  -pbinorm(cbind(1,age,female,gpa,intel2,intel3,white,0)%*%bo,cbind(1,age,female,gpa,intel2,intel3,white,income)%*%bs,cov12=sg))/(1-pnorm(cbind(1,age,female,gpa,intel2,intel3,white,income)%*%bs));
AMEno=mean(EMEno)

AMEno


IMR=invMillsRatio(probits,all=TRUE)
GR=rep(0,n)
GR[D==1]=(IMR$IMR1)[D==1]
GR[D==0]=-(IMR$IMR0)[D==0]
probitoo = glm(YY~age+female+gpa+intel2+intel3+white+income+D+GR,family=binomial(link="probit"));

summary(probitoo)


estols=lm(Y~X+D)

ptm <- proc.time()
####Counterfactual####
###1st Counterfactual to have income increase for bottom 10% individuals (poorest)
PS1=PS2=PS3=BNES
PO1=PO2=PO3=BNEO
ilist=cbind(1:n,income)
ilist=ilist[order(ilist[,2],decreasing = FALSE),]
index=rep(0,n)
index[ilist[1:1000,1]]=1
indexc=rep(0,n)
indexc[ilist[1:1000,1]]=0.5 ###increase the single index of bottom 1000 income by 0.5

V=as.matrix(SF)%*%as.vector(PS1)*as.vector(invNF)###Initial Social Interactions term
W=as.matrix(SF)%*%as.vector(PO1)*as.vector(invNF)

tC1=10
totC1=100;
repsC1=0;
while (tC1 > 1E-5&repsC1<totC1){
  repsC1=repsC1+1;
  V=as.matrix(SF)%*%as.vector(PS1)*as.vector(invNF)
  PSC1=pnorm(cbind(1,X,V)%*%betas+indexc);
  tC1=norm(PSC1-PS1,"f")/sqrt(n);
  PS1=PSC1;
}

B=100
CAME1=matrix(rep(0,B*(2*1000+2*n+1)),B,2*1000+2*n+1)

CAME1= foreach(i=1:B, .combine=rbind) %dopar% {
  # for(b in 1:B){print(b)
  library(mvtnorm)
  library(VGAM)
  vc = diag(2)
  covs=bipro$theta
  vc[lower.tri(vc)] = covs
  vc[upper.tri(vc)] = vc[lower.tri(vc)]
  eps = rmvnorm(n, rep(0, 2), vc)
  DC1= (cbind(1,X,V)%*%betas+indexc+eps[,1]>0)
  
  tO1=10
  totO1=100;
  repsO1=0;
  while (tO1 > 1E-5&repsO1<totO1){
    repsO1=repsO1+1;
    W=as.matrix(SF)%*%as.vector(PO1)*as.vector(invNF)
    pO11=DC1*pbinorm(cbind(1,X,DC1,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest)/pnorm(cbind(1,X,V)%*%betas+indexc)
    pO12=(1-DC1)*(pnorm(cbind(1,X,DC1,W)%*%betao)-pbinorm(cbind(1,X,DC1,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest))/(1-pnorm(cbind(1,X,V)%*%betas+indexc))
    POC1 = pO11+pO12
    tO1=norm(POC1-PO1,"f")/sqrt(n);
    PO1=POC1;
  }
  
  
  OC1=(cbind(1,X,DC1,W)%*%betao+eps[,2]>0)
  ###Marginal Effects
  EME=rep(0,1000)
  POE1=rep(0,n)
  POE0=rep(0,n)
  for(j in 1:1000){
    k=ilist[j,1]
    if(DC1[k]==1){
      POE1=PO1;###Get P_O^1
      DC=DC1;DC[k]=0;
      tC=10
      PYC0=rep(0,n);
      PYC1=rep(0,n);
      totC=100;
      repsC=0;
      while (tC > 1E-5&repsC<totC){
        repsC=repsC+1;
        W=as.matrix(SF)%*%as.vector(PYC0)*as.vector(invNF)
        p1=DC*pbinorm(cbind(1,X,DC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest)/pnorm(cbind(1,X,V)%*%betas+indexc)
        p2=(1-DC)*(pnorm(cbind(1,X,DC,W)%*%betao)
                   -pbinorm(cbind(1,X,DC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest))/(1-pnorm(cbind(1,X,V)%*%betas+indexc))
        PYC1 = p1+p2
        tC=norm(PYC1-PYC0,"f")/sqrt(n);
        PYC0=PYC1;
      }
      POE0=PYC0 ###Get P_O^0
    }  else{POE0=PO1;###Get P_O^0
    DCC=DC1;DCC[k]=1;
    tCC=10
    PYCC0=rep(0,n);
    PYCC1=rep(0,n);
    totCC=100;
    repsCC=0;
    while (tCC > 1E-5&repsCC<totCC){
      repsCC=repsCC+1;
      W=as.matrix(SF)%*%as.vector(PYCC0)*as.vector(invNF)
      pp1=DCC*pbinorm(cbind(1,X,DCC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest)/pnorm(cbind(1,X,V)%*%betas+indexc)
      pp2=(1-DCC)*(pnorm(cbind(1,X,DCC,W)%*%betao)-pbinorm(cbind(1,X,DCC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest))/(1-pnorm(cbind(1,X,V)%*%betas+indexc))
      PYCC1 = pp1+pp2
      tCC=norm(PYCC1-PYCC0,"f")/sqrt(n);
      PYCC0=PYCC1;
    }
    POE1=PYCC0 ###Get P_O^1
    }
    SY1=as.matrix(SF)%*%as.vector(POE1)*as.vector(invNF)
    SY0=as.matrix(SF)%*%as.vector(POE0)*as.vector(invNF)
    
    EME[j]=pbinorm((cbind(1,X,1,SY1)%*%betao)[k],(cbind(1,X,V)%*%betas+indexc)[k],cov12=sest)/pnorm((cbind(1,X,V)%*%betas+indexc)[k])
    -(pnorm((cbind(1,X,0,SY0)%*%betao)[k])
      -pbinorm((cbind(1,X,0,SY0)%*%betao)[k],(cbind(1,X,V)%*%betas+indexc)[k],cov12=sest))/(1-pnorm((cbind(1,X,V)%*%betas+indexc)[k]))
  }
  CAME1=c(mean(EME),DC1[ilist[1:1000,1]],OC1[ilist[1:1000,1]],DC1,OC1)
}

proc.time() - ptm


ptm <- proc.time()
###2nd Counterfactual to have index increase for top 10% popular individuals
IL = colSums(SF);
olist=cbind(1:n,IL)
olist=olist[order(olist[,2],decreasing = TRUE),]
index2=rep(0,n)
index2[olist[1:1000,1]]=1

indexl=rep(0,n)
indexl[olist[1:1000,1]]=0.5
V=as.matrix(SF)%*%as.vector(PS2)*as.vector(invNF)###Initial Social Interactions term
W=as.matrix(SF)%*%as.vector(PO2)*as.vector(invNF)

tC2=10
totC2=100;
repsC2=0;
while (tC2 > 1E-5&repsC2<totC2){
  repsC2=repsC2+1;
  V=as.matrix(SF)%*%as.vector(PS2)*as.vector(invNF)
  PSC2=pnorm(cbind(1,X,V)%*%betas+indexl);
  tC2=norm(PSC2-PS2,"f")/sqrt(n);
  PS2=PSC2;
}

CAME2=matrix(rep(0,B*(2*1000+2*n+1)),B,2*1000+2*n+1)


CAME2= foreach(i=1:B, .combine=rbind) %dopar% {
  # for(b in 1:B){print(b)
  library(mvtnorm)
  library(VGAM)
  vc = diag(2)
  covs=bipro$theta
  vc[lower.tri(vc)] = covs
  vc[upper.tri(vc)] = vc[lower.tri(vc)]
  eps = rmvnorm(n, rep(0, 2), vc)
  DC2= (cbind(1,X,V)%*%betas+indexl+eps[,1]>0)
  
  tO2=10
  totO2=100;
  repsO2=0;
  while (tO2 > 1E-5&repsO2<totO2){
    repsO2=repsO2+1;
    W=as.matrix(SF)%*%as.vector(PO2)*as.vector(invNF)
    pO21=DC2*pbinorm(cbind(1,X,DC2,W)%*%betao,cbind(1,X,V)%*%betas+indexl,cov12=sest)/pnorm(cbind(1,X,V)%*%betas+indexl)
    pO22=(1-DC2)*(pnorm(cbind(1,X,DC2,W)%*%betao)-pbinorm(cbind(1,X,DC2,W)%*%betao,cbind(1,X,V)%*%betas+indexl,cov12=sest))/(1-pnorm(cbind(1,X,V)%*%betas+indexl))
    POC2 = pO21+pO22
    tO2=norm(POC2-PO2,"f")/sqrt(n);
    PO2=POC2;
  }
  OC2=(cbind(1,X,DC2,W)%*%betao+eps[,2]>0)
  EME=rep(0,1000)
  POE1=rep(0,n)
  POE0=rep(0,n)
  for(j in 1:1000){
    k=olist[j,1]
    if(DC2[k]==1){
      POE1=PO1;###Get P_O^1
      DC=DC2;DC[k]=0;
      tC=10
      PYC0=rep(0,n);
      PYC1=rep(0,n);
      totC=100;
      repsC=0;
      while (tC > 1E-5&repsC<totC){
        repsC=repsC+1;
        W=as.matrix(SF)%*%as.vector(PYC0)*as.vector(invNF)
        p1=DC*pbinorm(cbind(1,X,DC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest)/pnorm(cbind(1,X,V)%*%betas+indexc)
        p2=(1-DC)*(pnorm(cbind(1,X,DC,W)%*%betao)
                   -pbinorm(cbind(1,X,DC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest))/(1-pnorm(cbind(1,X,V)%*%betas+indexc))
        PYC1 = p1+p2
        tC=norm(PYC1-PYC0,"f")/sqrt(n);
        PYC0=PYC1;
      }
      POE0=PYC0 ###Get P_O^0
    }  else{POE0=PO1;###Get P_O^0
    DCC=DC2;DCC[k]=1;
    tCC=10
    PYCC0=rep(0,n);
    PYCC1=rep(0,n);
    totCC=100;
    repsCC=0;
    while (tCC > 1E-5&repsCC<totCC){
      repsCC=repsCC+1;
      W=as.matrix(SF)%*%as.vector(PYCC0)*as.vector(invNF)
      pp1=DCC*pbinorm(cbind(1,X,DCC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest)/pnorm(cbind(1,X,V)%*%betas+indexc)
      pp2=(1-DCC)*(pnorm(cbind(1,X,DCC,W)%*%betao)-pbinorm(cbind(1,X,DCC,W)%*%betao,cbind(1,X,V)%*%betas+indexc,cov12=sest))/(1-pnorm(cbind(1,X,V)%*%betas+indexc))
      PYCC1 = pp1+pp2
      tCC=norm(PYCC1-PYCC0,"f")/sqrt(n);
      PYCC0=PYCC1;
    }
    POE1=PYCC0 ###Get P_O^1
    }
    SY1=as.matrix(SF)%*%as.vector(POE1)*as.vector(invNF)
    SY0=as.matrix(SF)%*%as.vector(POE0)*as.vector(invNF)
    
    EME[j]=pbinorm((cbind(1,X,1,SY1)%*%betao)[k],(cbind(1,X,V)%*%betas+indexc)[k],cov12=sest)/pnorm((cbind(1,X,V)%*%betas+indexc)[k])
    -(pnorm((cbind(1,X,0,SY0)%*%betao)[k])
      -pbinorm((cbind(1,X,0,SY0)%*%betao)[k],(cbind(1,X,V)%*%betas+indexc)[k],cov12=sest))/(1-pnorm((cbind(1,X,V)%*%betas+indexc)[k]))
  }
  CAME2=c(mean(EME),DC2[olist[1:1000,1]],OC2[olist[1:1000,1]],DC2,OC2)
}



proc.time() - ptm


DB1T=CAME1[,2:(1000+1)]
OB1T=CAME1[,(1000+2):(2*1000+1)]
DB2T=CAME2[,2:(1000+1)]
OB2T=CAME2[,(1000+2):(2*1000+1)]

DB1=CAME1[,(2*1000+2):(2*1000+n+1)]
OB1=CAME1[,(2*1000+n+2):(2*1000+2*n+1)]
DB2=CAME2[,(2*1000+2):(2*1000+n+1)]
OB2=CAME2[,(2*1000+n+2):(2*1000+2*n+1)]



sum(D[ilist[1:1000,1]])
sum(D[olist[1:1000,1]])
sum(YY[ilist[1:1000,1]])
sum(YY[olist[1:1000,1]])

sum(D)
sum(YY)

sum(DB1)/B
sum(DB2)/B

sum(OB1)/B
sum(OB2)/B


SimuD1=rowSums(DB1)
SimuD2=rowSums(DB2)

SimuO1=rowSums(OB1)
SimuO2=rowSums(OB2)

sd(SimuD1)
sd(SimuD2)
sd(SimuO1)
sd(SimuO2)


sum(DB1T)/B
sum(DB2T)/B

sum(OB1T)/B
sum(OB2T)/B


SimuDT1=rowSums(DB1T)
SimuDT2=rowSums(DB2T)

SimuOT1=rowSums(OB1T)
SimuOT2=rowSums(OB2T)

sd(SimuDT1)
sd(SimuDT2)
sd(SimuOT1)
sd(SimuOT2)




mean(CAME1[,1])
mean(CAME2[,1])
